rm(list =ls())
options(scipen=999)
gc()
packages <-c("tidyverse","estimatr","plm","stargazer","fastDummies","readstata13","ihs","AER")

new.packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages)

lapply(packages, require, character.only = TRUE)
rm(packages, new.packages)

setwd("PUT YOUR DIRECTORY HERE")

## insheet data
data <- read.csv("./Datasets/panel_bare_bones.csv")

## clean data
data$X <- NULL
data$locality_year <- NULL
data$subbotnik_inkind <- NULL
data$locality <- as.character(tolower(data$locality))

# load covariates, including spy priests
data <- merge(data, read.csv("./Datasets/main_confounders.csv")[,c("locality", "region", "pop", "schools", "shops", "restaurants", "cinemas", "Frac48", "coal_sqm", "minerals", "partition", "priests_continuous", "pop")], by = c("locality"), all.x = T)
data$region_numeric <- as.numeric(as.factor(data$region))


################################
### REGRESSIONS FOR SABOTAGE ###
################################

#subset to years with observations:
data_t <- data[which(data$year<1980),]

# turn subbotnik upside down so that it accurately capture sabotage
data_t$subbotnik_inkind_ <- data_t$subbotnik_inkind_*-1

# create wealth index
data_t$shops <- scale(data_t$shops)
data_t$restaurants <- scale(data_t$restaurants)
data_t$cinemas <- scale(data_t$cinemas)
data_t$wealth_index <- rowSums(data_t[,c("shops","restaurants","cinemas")], na.rm = T)
head(data_t[,c("shops", "restaurants", "cinemas", "wealth_index")], 50)
data_t$wealth_index[data_t$wealth_index == "NaN"] = NA  
data_t$wealth_index <- scale(data_t$wealth_index)

# create state capacity index
data_t$state_capacity_index <- scale(data_t$schools)

# create ethnic diversity index
data_t$ethnic_diversity_index <- scale(data_t$Frac48)

# create Russian occupation index
data_t$Russian_occupation_index <- ifelse(data_t$partition=="Russian",1,0)
data_t$Russian_occupation_index <- scale(data_t$Russian_occupation_index)

# create industrialization index
data_t$coal_sqm <- scale(data_t$coal_sqm)
data_t$minerals <- scale(data_t$minerals)
data_t$industrialization_index <- rowSums(data_t[,c("coal_sqm","minerals")], na.rm = F)
data_t$industrialization_index[data_t$industrialization_index == "NaN"] = NA  
data_t$industrialization_index <- scale(data_t$industrialization_index)

# average over years
data_agg <- aggregate(data_t[,c("locality_numeric", "subbotnik_inkind_", "commanders", 
                                "priests_continuous", "pop", "region_numeric", "wealth_index", 
                                "state_capacity_index", "ethnic_diversity_index", "Russian_occupation_index", 
                                "industrialization_index")], by=list(Category=data_t$locality_numeric), FUN=mean, na.rm=T)


# regressions
model_sabotage_bare = ivreg(scale(subbotnik_inkind_) ~ commanders | priests_continuous, data = data_agg)
model_sabotage_ctrl = ivreg(scale(subbotnik_inkind_) ~ commanders | priests_continuous + wealth_index + state_capacity_index + ethnic_diversity_index + Russian_occupation_index + industrialization_index + factor(region_numeric), data = data_agg)



###############################
### REGRESSIONS FOR STRIKES ###
###############################

# subset to years with observations:
data_t_late <- data[which(data$year>1979),]

# average across years
data_agg_late <- aggregate(data_t_late[,c("locality_numeric", "strikes", "commanders", "priests_continuous", "region_numeric")], by=list(Category=data_t_late$locality_numeric), FUN=mean, na.rm=T)

# attach covariates from above
data_agg_late <- merge(data_agg_late, data_agg[,c("pop", "locality_numeric", "wealth_index", "state_capacity_index", "ethnic_diversity_index", "Russian_occupation_index", "industrialization_index")], by = "locality_numeric", all.x = T)

# regressions
model_strikes_bare = ivreg(scale(strikes) ~ commanders | priests_continuous, data = data_agg_late)
model_strikes_ctrl = ivreg(scale(strikes) ~ commanders | priests_continuous + pop + wealth_index + state_capacity_index + ethnic_diversity_index + Russian_occupation_index + industrialization_index + factor(region_numeric), data = data_agg_late)



##### OUTPUT

stargazer(model_sabotage_bare, model_sabotage_ctrl, model_strikes_bare, model_strikes_ctrl,
          dep.var.labels = c("Sabotage", "Protests"),
          style = "qje",
          covariate.labels = c("Commanders"),
          star.char = c("*", "**", "***"),
          star.cutoffs = c(0.1, 0.05, 0.01),
          omit = c("Constant"),
          omit.stat = c("rsq", "ser"), 
          omit.table.layout = "n",
          add.lines = list(c("Controls", "No", "Yes", "No", "Yes"),
                           c("FEs", "No", "Yes", "No", "Yes")),
          out = "PUT YOUR FILEPATH HERE")

